*************************************************************************
******************* CLEAN CAB DATA MERGED WITH WAVE 3 *******************
*************************************************************************
{ /*** Do File DESCRIPTION ***/
/*In this do-file, first there will be some more cleaning of the AHS data sets.
	Then the data will be appended to the DLHS4 data and further cleaning will be
	done until finally obtaining the final data set that will be used for data 
	analysis.

data sets used:
	merge_wave3 - merged AHS COMB and CAB data sets from wave 3
	DONT_OVERWRITE_districtsCensusAHS - contains data on district population according to census 2011
	merge_success_districts - contains percentage of observations successfully merged, district wise
	AHS cab cleaned
	

data sets created:
temporary:
	relativepopulation_distr_census - contains relative district populations according to census2011
	relativepopulation_distr_cab - contains relative district sample population before merging in CAB data
final:	
	AHSDLHS_w3_ai_mort_notrepres

variables created:
	adjusted glucose and diabetes variables (for not fasted AHS observations)
	ai_NAT								q_ai_nat_rurb
	ai_NAT_rurb							noassets
	asset variables 					religion
	broad variables						sweight (AHS)
	q_ai_nat							venous (i.e. adjusted) glucose variables
	caste								psu
	
macros used:
	temp: folder contains data sets generated in this do file but which are not
		needed anymore at a later point in time
	final: folder contains the final data sets
	pop: folder contains population data from Census2011

*/ }
clear
set more off

global temp "C:\Users\Michaela\Desktop\manipulated data\temp\"
global final "C:\Users\Michaela\Desktop\manipulated data\final\"
global pop "C:\Users\Michaela\Desktop\rawdata\population data\"

use "$final\merge_wave3"
*******************************************************************************
************************** CLEANING AHS DATA SET ******************************
*******************************************************************************
{ 
{ /***** GENERAL CLEANING *****/
* drop variables that only contain missings
	drop fidh usual_residance_reason v54 comment isdeadmigrated hl_id wt ///
		fidx as as_binned rtelephoneno v104
	
* correct the religion variable 	
	tab religion, gen(rel)
	
	foreach var of varlist rel1 - rel8 {
		lab var `var' "hh head belongs to respective religion"
		lab val `var' yesno
	}
		
	rename (rel1 rel2 rel3 rel4 rel5 rel6 rel7 rel8) ///
		(hindu muslim christian sikh buddhist jain otherreligion noreligion)	

* generate the "broad" variables
	gen htn_broad_avg = (htn_narrow_avg == 1 | htn_treated == 1)
	replace htn_broad_avg = . if htn_narrow_avg == . & htn_treated == .
	
	gen htn_broad_sec = (htn_narrow_sec == 1 | htn_treated == 1)
	replace htn_broad_sec = . if htn_narrow_sec == . & htn_treated == .

	gen diab_broad = (diab_narrow == 1 | diab_treated == 1)
	replace diab_broad = . if diab_narrow == . & diab_treated == .
	
* caste category
	gen caste = social_group_code
	
* Primary Sampling Unit
	tostring fid, replace format("%13.0f")

	gen psu1 = substr(fid, 4,3) if state <= 9
	replace psu1 = substr(fid, 5,3) if state > 9
	destring psu1, replace
	
	gen psu = string(state, "%02.0f") + string(district, "%02.0f") + string(psu1, "%03.0f")
	destring psu, replace
	drop psu1
	
* label variables and values
	lab val htn_broad_avg yesno
	lab val htn_broad_sec yesno
	lab val diab_broad yesno
	
	lab def caste1 1 "SC" 2 "ST" 3 "Other/None"
	lab val caste caste1
	
	lab var htn_broad_avg "suffers from hypertension (avg. reading) or takes medication"
	lab var htn_broad_sec "suffers from hypertension (second reading) or takes medication"
	lab var diab_broad "suffers from diabetes according to test result or takes medication"
	lab var caste "caste category"
	lab var psu "Primary Sampling Unit"

* correct for skip pattern in educ variable
	replace educ = 1 if age1 < 7

save "$temp\mwave3_cleaning", replace

}
}
{ /* MORTALITY VARIABLE WITH DATA FROM MORT DATA SET */ 
use "$temp\mwave3_cleaning", clear

/* The reference period is 01.01.2011 - 31.01.2011. Hence, the reference period
	has a length of 365 days.
*/
	gen refdays = 365
	lab var refdays "number of days in reference period"

* beginning and end of reference period
	gen startrefdate_1 = "2011/01/01"
	gen double startrefdate_2 = clock(startrefdate_1, "YMD")
	gen refstart = dofc(startrefdate_2)
	format refstart %td
	drop startrefdate*
	lab var refstart "reference period started on 01/01/2011"

	gen endrefdate_1 = "2011/12/31"
	gen double endrefdate_2 = clock(endrefdate_1, "YMD")
	gen refend = dofc(endrefdate_2)
	format refend %td
	drop endrefdate*
	lab var refend "reference period ended on 31/12/2011"

* birth date
	rename (bday bmonth byear) (bday_cab bmonth_cab byear_cab)
	gen bday = date_of_birth
	replace bday = . if bday > 31 | bday == 0
	
	gen bmonth = month_of_birth
	replace bmonth = . if bmonth > 12 | bmonth == 0
	
	gen byear = year_of_birth
	
	gen bdate = mdy(bmonth, bday, byear)
	format bdate %td
	
* age at end of reference period
	gen age4 = refend - bdate
	lab var age4 "age in days at end of reference period"
	
* dummy for infant
	gen infant = 1 if age4 < 365
	replace infant = 0 if age4 >= 365 & age4 != .
	lab var infant "indiv was < 1y at end of reference period"
	
* dummy for U5
	gen u5 = 1 if age4 < 1825
	replace u5 = 0 if age4 >= 1825 & age4 != .
	lab var u5 "indiv was <5y at end of reference period"

* days lived in reference period
/* individuals that are infants at the end of the reference period lived exactly this number
		of days during the reference period.
		
	The same is true for children who are younger than 5
*/
	gen refdays_infant = age4 if infant == 1
	lab var refdays_infant "number of days being <1y lived during reference period"
	
	gen refdays_u5 = age4 if u5 == 1
	lab var refdays_u5 "number of days being <5y lived during reference period"

/* there are individuals that were born before the reference period started
	and turned 1 (or 5) during the reference period (i.e. they were born after 
	01.01.2010 or 01.01.2006 respectively). 
	For these observations, refdays_infant (refdays_u5) will take the value 
	of days that the individual was younger than 1 (younger than 5) during the 
	reference period.
*/
	replace refdays_infant = 365 - (refstart - bdate) if bdate < td(01jan2011) & bdate > td(01jan2010) & bdate != .
	
	replace refdays_u5 = 1825 - (refstart - bdate) if bdate < td(01jan2011) & bdate > td(01jan2006) & bdate != .

	
/* refdays_infant (refdays_u5) will take the value of 0 if an individual was not 
		younger than 1y (5y) at any point in time during the reference period
*/
	replace refdays_infant = 0 if bdate < td(01jan2010)
	
	replace refdays_u5 = 0 if bdate < td(01jan2006)
	

* proportion of number of days being <1y (<5y) during reference period
	gen prop_infant = refdays_infant / 365
	lab var prop_infant "proportion of time being <1y during reference period"
	
	gen prop_u5 = refdays_u5 / 1825
	lab var prop_u5 "proportion of time being <5y during reference period"
	
* sum up proportions per HH
	bysort hhid: egen hhprop_infant = total(prop_infant) if prop_infant != . | (prop_infant == . & age1 >= 1)
		
	bysort hhid: egen hhprop_u5 = total(prop_u5) if prop_u5 != . | (prop_u5 == . & age1 >= 5)
		
* mortality rates
/* total_infdeath is the number of deceased infants in this household. Data was
	taken from the MORT data set.
	
	total_u5 death is the number of deceased children under the age of 5. Data
		was taken from the MORT data set
*/
	by hhid: gen infmortrate = total_infdeath / (total_infdeath + hhprop_infant)
	* people who have infants but no death in ref period, have a death rate of 0
	replace infmortrate = 0 if total_infdeath == . & !inlist(hhprop_infant, 0, .)
	
	by hhid: gen u5mortrate = total_u5death / (total_u5death + hhprop_u5)
	* people who have u5 children but no death in ref period, have a death rate of 0
	replace u5mortrate = 0 if total_u5death == . & !inlist(hhprop_u5, 0,.)

	* drop variables that will not be needed
	drop total_infdeath infant refdays_infant prop_infant hhprop_infant age3 ///
		refdays refend bday bmonth byear bdate age4 u5 refdays_u5 prop_u5 hhprop_u5 ///
		child adult b1m b11m a1y infant_death u5_death total_u5death refstart
}



{ /*** SAMPLING WEIGHTS ***/
/* see do-file "comparing adjusted size of districts with which we compared the
	relative number of observations per district in our merged data set to the 
	relative frequencies in Census2011 data.
	the results showed that even after adjusting with a sampling weight, that 
	adjusted for the observations that were lost in the merging process, 
	the districts as well as the states are not represented proportionally 
	in our sample. 
	Hence, we have do generate a sampling weight that takes into account the 
	relative size of each district according to the Census 2011 data as well as 
	the proportion in our final sample after merging.
	For this purpose we will use data obtained from http://www.censusindia.gov.in/pca/default.aspx 
	The district codes from the AHS were entered manually. The resulting data set
	will be used for calculating the population ratio of each district.
*/

use "$pop\DONT_OVERWRITE_AdultPopulationByDistrict", clear
	rename district_AHS district
	
	
* These districts are not included in the merged CAB data set.
	egen totaladultpop_cens = total(adultpopdistr)
	format totaladultpop_cens %15.0g

* drop districts that are not included in AHS Data
	drop if district > 1000
	drop if state == 5 & (district == 2 | district == 3)
	drop if state == 10 & inlist(district, 10, 12, 15, 18)
	drop if state == 20 & (district == 4 | district == 18)
		
	gen relpop_cens = adultpopdistr / totaladultpop_cens
	
	lab var totaladultpop_cens "total number of adults living in AHS districts, Census2011"
	lab var relpop_cens "relative adult population per district, Census2011"
	
	keep state district relpop_cens totaladultpop_cens

save "$temp\relativepopulation_distr_census", replace
	
/* calculate the relative number of observations in the data after merging */

use "$temp\mwave3_cleaning", clear
	keep if dropage == 0
	bysort state_dist: gen districtadultpop = _n
	by state_dist: replace districtadultpop = _N
	
	bysort state_dist: gen distno = 1 if _n == 1
	keep if distno == 1
	
	egen totaladultpop_cab = total(districtadultpop)
	
	
	gen relpop_cab = districtadultpop / totaladultpop_cab
	
	keep state district relpop_cab totaladultpop_cab 
	
	lab var totaladultpop_cab "total number of adults living in AHS districts, merged data"
	lab var relpop_cab "relative adult population per district, AHS merged data"
		
save "$temp\relativepopulation_distr_merged", replace


* merge our data set with the relative district size data sets
use "$temp\mwave3_cleaning", clear

	merge m:1 state district using "$temp\relativepopulation_distr_census"
	drop _merge
	
	merge m:1 state district using "$temp\relativepopulation_distr_merged"
	drop _merge
	
	gen relweight = relpop_cens / relpop_cab
	lab var relweight "district sampling weight"

/*
	testing whether now the observations are represented proportionally
		keep if dropage == 0
		destring state_dist, gen(sdist)
		bysort state_dist: gen distsize = _n
		by state_dist: replace distsize = [_N]
		gen adjdist = distsize*relweight
		br state_dist adjdist
		
		bysort state_dist: gen distno = 1 if _n == 1
		keep if distno == 1
		
		* compare the relative district sizes from the census and the one created here
		* the relative sizes are identical --> it worked
*/
/* generate a variable for the ratio of the sample size wrt the whole population
	in all AHS states. This will be needed to later adjust for AHS and DLHS sample
	sizes
*/
	gen sampleratio_AHS = totaladultpop_cab / totaladultpop_cens
	lab var sampleratio_AHS "Sample/Census; needed for later calculations"
	drop relpop_cens relpop_cab totaladultpop_cab totaladultpop_cens
}	


{ /*** VARIABLES FOR ASSET INDEX***/


* improved water source: piped water into dwelling/yard/plot, public tap / stand pipe, hand pump, tube well / bore hole, protected dug well
	gen impwater = inlist(drinking_water_source, 1,2,3,4,5)
	
* notshared: indicates that toilet is not shared
/* in 40% of the cases the shared variable is missing. As I don't have the quesitonnaire
	I can imagine that the missings are caused by a skip pattern. Only 10% of the observation
	indicate that they share the sanitation facility. This is not very likely 
	to correspond to the truth. Hence, missing values are coded as "shared"
*/
	gen notshared = (is_toilet_shared == 2)
	
* impsani: (pour) flush latrine to sewer system, septic tank or pit latrine, VIP, pit latrine with slab
	gen impsani = (inlist(toilet_used, 1,2,3,5,6) & notshared == 1)
	
* fuel: LPG, electricity, biogas
	gen fuel = inlist(cooking_fuel, 6,7,8)
		
* house: pucca 
	gen house = (house_structure == 1)
	
* light: electricity or solar
	gen light = inlist(lighting_source, 1,3)
	
*owner: house is owned
	gen owner = (owner_status == 1)

* assets
	gen radio = (is_radio == 1)
	gen tv = (is_television== 1)
	gen phone = inlist(is_telephone, 1,2,3)
	gen refri = (is_refrigerator == 1) 
	gen bike = (is_bicycle == 1) 
	gen scooter = (is_scooter == 1)
	gen car = (is_car == 1)
	gen comp = inlist(is_computer, 1,2)
	gen wash = (is_washing_machine == 1)
	gen sewing = (is_sewing_machine == 1)
	gen land = (land_possessed < 6)

* label variables
	lab var impwater "HH as acces to improved water supply"
	lab var notshared "sanitation facility is not shared"
	lab var impsani "HH uses improved sanitation facility"
	lab var fuel "HH uses LPG, elec, biogas for cooking"
	lab var house "house is pucca"
	lab var light "HH uses elec or solar for lighting"
	lab var owner "house is owned"
	lab var radio "HH owns radio"
	lab var tv "HH owns TV"
	lab var phone "HH owns mobile/landline phone"
	lab var refri "HH owns fridge"
	lab var bike "HH owns bike"
	lab var scooter "HH owns scooter"
	lab var car "HH owns car"
	lab var comp "HH owns computer"
	lab var wash "HH owns washing machine"
	lab var sewing "HH owns sewing machine"
	lab var land "HH owns any land"
	lab val notshared yesno

save "$temp\merge_wave3_clean", replace
}

*******************************************************************************
******************************** ASSET INDEX **********************************
*******************************************************************************
{ /* Dealing with the missings */
/* 12% of the observations have missing in most of the asset variables.
	This will first be checked. Then an indicator variable will be created
	that allows us to exclude these households fromthe analysis */

# delimit ;
	foreach v in drinking_water_source is_toilet_shared toilet_used cooking_fuel 
			house_structure lighting_source owner_status is_television is_telephone 
			is_refrigerator is_bicycle is_scooter is_car is_computer is_washing_machine 
			is_sewing_machine land_possessed { ;
# delimit cr
	tab `v' if is_radio == ., m
}

/* 
1.6 % of these observations have missings in all other assets except in 
	cooking fuel and toilet_used. This seems to be systematic as they all
	contain "0" (plus 1 value that is not missing in 4 other assets). 
The remaining observations have missings in all assets. --> A indicator variable
	will be created using missings in is_radio as decision rule
*/
	gen noassets = (is_radio == .)
	lab var noassets "observation is excluded from PCA"
	
	
/* check the number of missings using only the observations that have not missings
	for all assets. is_toilet_shared is excluded due to skip pattern stated above */	
# delimit ;
	foreach v in drinking_water_source toilet_used cooking_fuel 
			house_structure lighting_source owner_status is_television is_telephone 
			is_refrigerator is_bicycle is_scooter is_car is_computer is_washing_machine 
			is_sewing_machine land_possessed{ ;
# delimit cr
	mdesc `v' if noassets == 0
}
/* result: there are no missings in any of the assets if noassets == 0 */

	
/* set generated asset variables missing for observations with noassets == 1
	to avoid later confusion */

foreach v in impwater notshared impsani fuel house light owner radio tv phone refri bike scooter car comp wash sewing land {
	replace `v' = . if noassets == 1
}
}

********************************************************************************
************************ NATION WIDE ASSET INDEX *******************************
********************************************************************************
{

	append using "$final\DLHS_mort" 
	replace noassets = 0 if noassets == . // for DLHS observations this variable is missing --> we set it 0

* drop  obsAHS state_no observ obs_strurb obsDLHS obsDLHS4rur // variables created before to calculate asset index on AHS level

	gen obsNAT = _n if noassets == 0
		
	local assets impwater impsani fuel house light radio tv phone refri bike scooter car owner comp wash sewing land

		di "`assets'"
		sum obsNAT
		di "`r(max)'"
		local o `r(max)'

	foreach v in `assets' {
		count if `v' == !.
		if r(N)!= 0 {
			capture sum `v'
			di "`r(sd)'"
			if `r(sd)' == 0 {
				di "`v' has no variation and will not be included in factor analysis"
				local assets: list assets - v 
				di "new assets: `assets'"
			}
		}
	}
	
	local var `assets'
	display "`assets'"	

	capture noisily factor `var' if noassets == 0, factors(1) pcf
	capture noisily	predict ai_NAT if noassets == 0
	lab var ai_NAT "asset score; all DLHS and AHS states"

*** Quintiles
	sort ai_NAT
	xtile q_ai_nat = ai_NAT if noassets == 0, nq(5)
	lab var q_ai_nat "quintiles, national level"
}
**************************************************************************
************ CREATE NATION WIDE RURAL_URBAN ASSET INDEX ******************
**************************************************************************
{
	bysort rural: gen obsNATrur = _n

	forvalues rur = 0/1 {
		di `rur'
		local assets impwater impsani fuel house light radio tv phone refri bike scooter car owner comp wash sewing land
			di "`assets'"
			sum obsNATrur if rural == `rur'
			di "`r(max)'"
		local o `r(max)'

			foreach v in `assets' {
				count if `v' == !. & rural == `rur'
				if r(N) !=0 {
						capture sum `v' if rural == `rur'
						di "`r(sd)'"
						if `r(sd)' == 0 {
							di "`v' has no variation and will not be included in factor analysis"
							local assets: list assets - v 
							di "new assets: `assets'"
						}
				}
			}
		local var `assets'
		di "`assets'"	

		local n `rur'
		di `rur'

		capture noisily	factor `var' if rural == `n', factors(1) pcf
		capture noisily	predict ai_NAT_rurbt`n' if rural == `n'
		
* Quintiles
		sort ai_NAT_rurbt`n'
		xtile q_ai_nat_rurb`n' = ai_NAT_rurbt`n' if rural == `n', nq(5)
		lab var q_ai_nat_rurb`n' "quintiles, national level, rural == `n'"
	}		

* reducing number of variables
	replace ai_NAT_rurbt0 = ai_NAT_rurbt1 if ai_NAT_rurbt0 == .
	rename ai_NAT_rurbt0 ai_NAT_rurb
	
	replace q_ai_nat_rurb0 = q_ai_nat_rurb1 if q_ai_nat_rurb0 == .
	rename q_ai_nat_rurb0 q_ai_nat_rurb
	
	drop obsNAT obsNATrur ai_NAT_rurbt1 q_ai_nat_rurb1
}

		
{ /*** new glucose variables ***/
* unadjusted (capillary) blood sugar level
	gen glucose_unadj = fasting_blood_glucose_mg_dl if fasting_blood_glucose_mg_dl > 0 & DLHS == 0
	replace glucose_unadj = hv91a if hv91a >= 40 & hv91a <= 600 & DLHS == 1
	replace glucose_unadj = . if dropage == 1 | pregnant == 1
	
	gen glucgrt200_unadj = 1 if glucose_unadj >= 200 & glucose_unadj != .
	replace glucgrt200_unadj = 0 if glucose_unadj < 200
	
	gen diab_narrow_unadj = 1 if ((fasted == 1 & glucose_unadj >= 126) | (fasted !=1 & glucose_unadj >= 200)) & glucose_unadj != . & DLHS == 1
	replace diab_narrow_unadj = 0 if (fasted == 1 & glucose_unadj < 126) | (fasted !=1 & glucose_unadj < 200) & DLHS == 1
	replace diab_narrow_unadj = 1 if glucose_unadj >= 126 & glucose_unadj != . & DLHS == 0
	replace diab_narrow_unadj = 0 if glucose_unadj <= 126 & DLHS == 0
	
	gen diab_broad_unadj = (diab_narrow_unadj == 1 | diab_treated == 1) 
	replace diab_broad_unadj = . if diab_narrow_unadj == . & diab_treated == .

/* now we have decided to use the unadjusted variables for the analysis. For convenience,
	the variables will be renamed */
	
	rename (glucose glucgrt200 diab_narrow diab_broad) (glucose_adj glucgrt200_adj diab_narrow_adj diab_broad_adj)
	rename (glucose_unadj glucgrt200_unadj diab_narrow_unadj diab_broad_unadj) (glucose glucgrt200 diab_narrow diab_broad)
	
* assuming all AHS observations to be not fasted
	* first use (unadjusted) capillary blood sugar level
	gen diab_narrow_nf = 1 if glucose >= 200 & glucose != . & DLHS == 0
	replace diab_narrow_nf = 0 if glucose < 200 & DLHS == 0
	replace diab_narrow_nf = diab_narrow if DLHS == 1
	
	gen diab_broad_nf = ((diab_narrow_nf == 1 | diab_treated == 1) & DLHS == 0)
	replace diab_broad_nf = . if diab_narrow_nf == . & diab_treated == . & DLHS == 0
	replace diab_broad_nf = diab_broad if DLHS == 1
	
	* now use (adjusted) venous blood sugar level
	gen diab_narrow_adj_nf = 1 if glucose_adj >= 200 & glucose_adj != . & DLHS == 0
	replace diab_narrow_adj_nf = 0 if glucose_adj < 200 & DLHS == 0
	replace diab_narrow_adj_nf = diab_narrow_adj if DLHS == 1
	
	gen diab_broad_adj_nf = ((diab_narrow_adj_nf == 1 | diab_treated == 1) & DLHS == 0)
	replace diab_broad_adj_nf = . if diab_narrow_adj_nf == . & diab_treated == . & DLHS == 0
	replace diab_broad_adj_nf = diab_broad_adj if DLHS == 1
		
	order ai_NAT ai_NAT_rurb q_ai_nat q_ai_nat_rurb hhsize child adult childhh ///
		adulthh femalehh malehh mort_women MORT WOMAN, last
}


/*** FINAL SAMPLING WEIGHTS ***/
* copy sampleratio values to all observations
	sort sampleratio_AHS
	replace sampleratio_AHS = sampleratio_AHS[1]
	sort sampleratio_DLHS
	replace sampleratio_DLHS = sampleratio_DLHS[1]

* generate ratio of DLHS to AHS sample ratio
	gen upweighfac = sampleratio_DLHS / sampleratio_AHS
	lab var upweighfac "umweighting factor for AHS states"
	
* generate sweight for AHS
	replace sweight = relweight*upweighfac if DLHS == 0
	
drop sampleratio_DLHS sampleratio_AHS child male adult b1m b11m a1y /// 
	infant_death u5_death total_infdeath total_u5death sday smonth syear

* labelling variables
	lab var MORT "data from MORT(AHS)/HH(DLHS) data sets"
	lab var WOMAN "data from WOMAN(AHS)/EW(DLHS) data sets"
	lab var mort_women "mortality ratio; total deaths/total births; not age specific; WOMAN(AHS)/EW(DLHS) data sets"
	lab var ai_NAT_rurb "asset score rural-urban; all AHS and DLHS states"
	lab var q_ai_nat_rurb "quintiles national level, rural urban"
	lab var glucose "unadjusted (capillary) blood sugar level"
	lab var glucgrt200 "unadjusted (capillary) blood sugar level above 200"
	lab var diab_narrow "suffers from diabetes according to test result (capillary)"
	lab var diab_broad "suffers from diabetes according to test result (capillary) or takes medication"
	lab var diab_broad_adj "suffers from diabetes according to test result (venous) or takes medication"
	lab var diab_narrow_adj "suffers from diabetes according to test result (venous)"
	lab var glucgrt200_adj "adjusted (venous) blood sugar level above 200"
	lab var glucose_adj "adjusted (venous) blood sugar level"
	lab var diab_narrow_nf "capillary; assume: nobody fasted in AHS; no change for DLHS"
	lab var diab_broad_nf "capillary; assume: nobody fasted in AHS; no change for DLHS"
	lab var diab_narrow_adj_nf "venous; assume: nobody fasted in AHS; no change for DLHS"
	lab var diab_broad_adj_nf "venous; assume: nobody fasted in AHS; no change for DLHS"
	lab var hemoglobin "hemoglobin in g/dL"
	lab var anaemia "indiv suffers from anaemia"
	lab var sevanaemia "indiv suffers from severe anaemia"
	
save "$final\AHSDLHS_w3_v2", replace




